#include "HelixHough.h"
#include <cmath>
#include <algorithm>
#include <iostream>
#include <sys/time.h>

using namespace std;

void HelixHough::fillBins(unsigned int hit_counter, float* min_phi_a, float* max_phi_a, vector<SimpleHit3D>& four_hits, vector<vector<vector<unsigned int> > >& z_bins, unsigned int n_d, unsigned int n_k, unsigned int n_dzdl, unsigned int n_z0, unsigned int d_bin, unsigned int k_bin, unsigned int n_phi, unsigned int zoomlevel, float low_phi, float high_phi, float inv_phi_range)
{
  for(unsigned int i=0;i<hit_counter;++i)
  {
    if(min_phi_a[i] >= 0.)
    {
      if( ( max_phi_a[i] < low_phi ) || ( min_phi_a[i] > high_phi ) ){continue;}
      unsigned int low_bin = 0;
      unsigned int high_bin = (n_phi-1);
      if( min_phi_a[i] > low_phi )
      {
        low_bin = (unsigned int)(((min_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
      }
      if( max_phi_a[i] < high_phi ) 
      {
        high_bin = (unsigned int)(((max_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
      }
      unsigned int index = four_hits[i].index;
      for(unsigned int b=low_bin;b<=high_bin;++b)
      {
        for(unsigned int zbin=0;zbin<z_bins[index].size();++zbin)
        {
          unsigned int bin = BinEntryPair5D::linearBin(n_d, n_k, n_dzdl, n_z0, b, d_bin, k_bin, z_bins[index][zbin][0], z_bins[index][zbin][1]);
          bins_vec[zoomlevel]->push_back(BinEntryPair5D(bin, index));
        }
      }
    }
    else
    {
      if( ( high_phi < ( min_phi_a[i] + 2.*M_PI) ) && ( low_phi > max_phi_a[i] ) ){continue;}
      if( ( high_phi < ( min_phi_a[i] + 2.*M_PI) ) && ( low_phi <= max_phi_a[i] ) )
      {
        unsigned int low_bin = 0;
        unsigned int high_bin = (unsigned int)(((max_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
        unsigned int index = four_hits[i].index;
        for(unsigned int b=low_bin;b<=high_bin;++b)
        {
          for(unsigned int zbin=0;zbin<z_bins[index].size();++zbin)
          {
            unsigned int bin = BinEntryPair5D::linearBin(n_d, n_k, n_dzdl, n_z0, b, d_bin, k_bin, z_bins[index][zbin][0], z_bins[index][zbin][1]);
            bins_vec[zoomlevel]->push_back(BinEntryPair5D(bin, index));
          }
        }
      }
      else if( ( high_phi >= ( min_phi_a[i] + 2.*M_PI) ) && ( low_phi > max_phi_a[i] ) )
      {
        unsigned int high_bin = (n_phi-1);
        unsigned int low_bin = (unsigned int)(((2.*M_PI+min_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
        unsigned int index = four_hits[i].index;
        for(unsigned int b=low_bin;b<=high_bin;++b)
        {
          for(unsigned int zbin=0;zbin<z_bins[index].size();++zbin)
          {
            unsigned int bin = BinEntryPair5D::linearBin(n_d, n_k, n_dzdl, n_z0, b, d_bin, k_bin, z_bins[index][zbin][0], z_bins[index][zbin][1]);
            bins_vec[zoomlevel]->push_back(BinEntryPair5D(bin, index));
          }
        }
      }
      else
      {
        //split into two sets of bins
        
        unsigned int high_bin = (n_phi-1);
        unsigned int low_bin = (unsigned int)(((2.*M_PI+min_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
        unsigned int index = four_hits[i].index;
        for(unsigned int b=low_bin;b<=high_bin;++b)
        {
          for(unsigned int zbin=0;zbin<z_bins[index].size();++zbin)
          {
            unsigned int bin = BinEntryPair5D::linearBin(n_d, n_k, n_dzdl, n_z0, b, d_bin, k_bin, z_bins[index][zbin][0], z_bins[index][zbin][1]);
            bins_vec[zoomlevel]->push_back(BinEntryPair5D(bin, index));
          }
        }
        
        low_bin = 0;
        high_bin = (unsigned int)(((max_phi_a[i]-low_phi)*inv_phi_range)*((float)(n_phi)));
        index = four_hits[i].index;
        for(unsigned int b=low_bin;b<=high_bin;++b)
        {
          for(unsigned int zbin=0;zbin<z_bins[index].size();++zbin)
          {
            unsigned int bin = BinEntryPair5D::linearBin(n_d, n_k, n_dzdl, n_z0, b, d_bin, k_bin, z_bins[index][zbin][0], z_bins[index][zbin][1]);
            bins_vec[zoomlevel]->push_back(BinEntryPair5D(bin, index));
          }
        }
      }
    }
  }
}


void HelixHough::vote_sse(unsigned int zoomlevel)
{
//   for(unsigned int ca=0;ca<nhits_array_vec[zoomlevel]->size();ca++)
//   {
//     for(unsigned int d=0;d<(*(nhits_array_vec[zoomlevel]))[ca].size();d++)
//     {
//       for(unsigned int r=0;r<(*(nhits_array_vec[zoomlevel]))[ca][d].size();r++)
//       {
//         for(unsigned int th=0;th<(*(nhits_array_vec[zoomlevel]))[ca][d][r].size();th++)
//         {
//           for(unsigned int zz0=0;zz0<(*(nhits_array_vec[zoomlevel]))[ca][d][r][th].size();zz0++)
//           {
//             (*(nhits_array_vec[zoomlevel]))[ca][d][r][th][zz0] = 0;
//           }
//         }
//       }
//     }
//   }
  
  bins_vec[zoomlevel]->clear();
  
  unsigned int n_phi = (*(nhits_array_vec[zoomlevel])).size();
  unsigned int n_d = (*(nhits_array_vec[zoomlevel]))[0].size();
  unsigned int n_k = (*(nhits_array_vec[zoomlevel]))[0][0].size();
  unsigned int n_dzdl = (*(nhits_array_vec[zoomlevel]))[0][0][0].size();
  unsigned int n_z0 = (*(nhits_array_vec[zoomlevel]))[0][0][0][0].size();
  
  float z0_size = (zoomranges[zoomlevel].max_z0 - zoomranges[zoomlevel].min_z0)/((float)n_z0);
  float dzdl_size = (zoomranges[zoomlevel].max_dzdl - zoomranges[zoomlevel].min_dzdl)/((float)n_dzdl);
  float d_size = (zoomranges[zoomlevel].max_d - zoomranges[zoomlevel].min_d)/((float)n_d);
  float k_size = (zoomranges[zoomlevel].max_k - zoomranges[zoomlevel].min_k)/((float)n_k);
  float low_phi = zoomranges[zoomlevel].min_phi;
  float high_phi = zoomranges[zoomlevel].max_phi;
  float inv_phi_range = 1./(high_phi - low_phi);
  float low_z0 = zoomranges[zoomlevel].min_z0;
  float high_z0 = zoomranges[zoomlevel].max_z0;
  float z0_bin_size_inv = 1./z0_size;
  
  //cache cosine and sine calculations
  float min_cos = cos( zoomranges[zoomlevel].min_phi );
  float max_cos = cos( zoomranges[zoomlevel].max_phi );
  float min_sin = sin( zoomranges[zoomlevel].min_phi );
  float max_sin = sin( zoomranges[zoomlevel].max_phi );
  if( (high_phi - low_phi) > M_PI )
  {
    min_cos = 1.;
    max_cos = -1.;
    min_sin = 0.;
    max_sin = 0.;
  }
  
  
  vector<vector<vector<unsigned int> > > z_bins;
  vector<vector<unsigned int> > z_bin_1;
  vector<unsigned int> one_z_bin;one_z_bin.assign(2,0);
  //first, do the voting in z
  timeval t1,t2;
  double time1=0.;
  double time2=0.;    
  gettimeofday(&t1, NULL);
  for(unsigned int i=0;i<hits_vec[zoomlevel]->size();i++)
  {
    z_bin_1.clear();
    for(unsigned int th=0;th<n_dzdl;th++)
    {
      float min_dzdl = zoomranges[zoomlevel].min_dzdl + ((float)(th))*dzdl_size;
      float max_dzdl = min_dzdl + dzdl_size;
      
      float min_z0=0.;
      float max_z0=0.;
      z0Range_sse((*(hits_vec[zoomlevel]))[i], min_cos, min_sin, max_cos, max_sin, zoomranges[zoomlevel].min_k, zoomranges[zoomlevel].max_k, zoomranges[zoomlevel].min_phi, zoomranges[zoomlevel].max_phi, zoomranges[zoomlevel].min_d, zoomranges[zoomlevel].max_d, min_dzdl, max_dzdl, min_z0, max_z0);
      
      unsigned int low_bin=0;
      unsigned int high_bin=0;
      if((min_z0 > high_z0) || (max_z0 < low_z0))
      {
        low_bin=1;
        high_bin=0;
      }
      else
      {
        if(min_z0 > low_z0)
        {
          low_bin = (unsigned int)((min_z0 - low_z0)*z0_bin_size_inv);
        }
        else
        {
          low_bin = 0;
        }
        
        if(max_z0 < high_z0)
        {
          high_bin = (unsigned int)((max_z0 - low_z0)*z0_bin_size_inv);
        }
        else
        {
          high_bin = n_dzdl - 1;
        }
      }
      one_z_bin[0] = th;
      for(unsigned int bb=low_bin;bb<=high_bin;bb++)
      {
        if(bb >= n_z0){continue;}
        one_z_bin[1] = bb;
        z_bin_1.push_back(one_z_bin);
      }
    }
    z_bins.push_back(z_bin_1);
  }
  gettimeofday(&t2, NULL);
  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec)/1000000.);
  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec)/1000000.);
  z_vote_time += (time2 - time1);
  
  
  //now vote in xy
  SimpleHit3D temphit(0.,0., 0.,0., 0.,0., 0, 0);
  vector<SimpleHit3D> four_hits;four_hits.assign(4, temphit);
  unsigned int hit_counter = 0;
  float x_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  float y_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  
  float min_phi_1_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  float max_phi_1_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  float min_phi_2_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  float max_phi_2_a[4] __attribute__((aligned(16))) = {0.,0.,0.,0.};
  float min_k = zoomranges[zoomlevel].min_k;
  float max_k = min_k + k_size;
  //TODO protect against (d + 1/k)<0
  gettimeofday(&t1, NULL);
  for(unsigned int k_bin=0;k_bin<n_k;++k_bin)
  {
    float min_k_a[4] __attribute__((aligned(16))) = {min_k,min_k,min_k,min_k};
    float max_k_a[4] __attribute__((aligned(16))) = {max_k,max_k,max_k,max_k};
    float min_d = zoomranges[zoomlevel].min_d;
    float max_d = min_d + d_size;
    for(unsigned int d_bin=0;d_bin<n_d;++d_bin)
    {
      float min_d_a[4] __attribute__((aligned(16))) = {min_d,min_d,min_d,min_d};
      float max_d_a[4] __attribute__((aligned(16))) = {max_d,max_d,max_d,max_d};
      for(unsigned int i=0;i<hits_vec[zoomlevel]->size();i++)
      {
        // check whether this hit could make it into any phi bin
        // if sqrt(x*x + y*y) > (2/k + d), then no solutions
        // equivalently, if (x*x + y*y)*k*k > (2 + dk)^2
        float x = (*(hits_vec[zoomlevel]))[i].x;
        float y = (*(hits_vec[zoomlevel]))[i].y;
        if( ( (x*x + y*y)*min_k*min_k ) > ( (2. + max_d*min_k)*(2. + max_d*min_k) ) ){continue;}
        // if x*x + y*y < d*d there are no solutions
        if( ( (x*x + y*y) ) < ( min_d*min_d ) ){continue;}
        four_hits[hit_counter] = ((*(hits_vec[zoomlevel]))[i]);
        x_a[hit_counter] = four_hits[hit_counter].x;
        y_a[hit_counter] = four_hits[hit_counter].y;
        four_hits[hit_counter].index = i;
        hit_counter++;
        if(hit_counter == 4)
        {
          HelixHough::phiRange_sse(x_a, y_a, min_d_a, max_d_a, min_k_a, max_k_a, min_phi_1_a, max_phi_1_a, min_phi_2_a, max_phi_2_a);
          
          for(unsigned int h=0;h<hit_counter;++h)
          {
            float dphi = sqrt((four_hits[h].dx*four_hits[h].dx + four_hits[h].dy*four_hits[h].dy)/(four_hits[h].x*four_hits[h].x + four_hits[h].y*four_hits[h].y));
            
            min_phi_1_a[h] -= dphi;
            min_phi_2_a[h] -= dphi;
            max_phi_1_a[h] += dphi;
            max_phi_2_a[h] += dphi;
            
          }
          
          fillBins(hit_counter, min_phi_1_a, max_phi_1_a, four_hits, z_bins, n_d, n_k, n_dzdl, n_z0, d_bin, k_bin, n_phi, zoomlevel, low_phi, high_phi, inv_phi_range);
          fillBins(hit_counter, min_phi_2_a, max_phi_2_a, four_hits, z_bins, n_d, n_k, n_dzdl, n_z0, d_bin, k_bin, n_phi, zoomlevel, low_phi, high_phi, inv_phi_range);
          hit_counter = 0;
        }
      }
      if(hit_counter != 0)
      {
        HelixHough::phiRange_sse(x_a, y_a, min_d_a, max_d_a, min_k_a, max_k_a, min_phi_1_a, max_phi_1_a, min_phi_2_a, max_phi_2_a);
        
        for(unsigned int h=0;h<hit_counter;++h)
        {
          float dphi = sqrt((four_hits[h].dx*four_hits[h].dx + four_hits[h].dy*four_hits[h].dy)/(four_hits[h].x*four_hits[h].x + four_hits[h].y*four_hits[h].y));
          
          min_phi_1_a[h] -= dphi;
          min_phi_2_a[h] -= dphi;
          max_phi_1_a[h] += dphi;
          max_phi_2_a[h] += dphi;
        }
        
        fillBins(hit_counter, min_phi_1_a, max_phi_1_a, four_hits, z_bins, n_d, n_k, n_dzdl, n_z0, d_bin, k_bin, n_phi, zoomlevel, low_phi, high_phi, inv_phi_range);
        fillBins(hit_counter, min_phi_2_a, max_phi_2_a, four_hits, z_bins, n_d, n_k, n_dzdl, n_z0, d_bin, k_bin, n_phi, zoomlevel, low_phi, high_phi, inv_phi_range);
        hit_counter = 0;
      }
      min_d += d_size;
      max_d += d_size;
    }
    min_k += k_size;
    max_k += k_size;
  }
  gettimeofday(&t2, NULL);
  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec)/1000000.);
  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec)/1000000.);
  xy_vote_time += (time2 - time1);
  
  sort(bins_vec[zoomlevel]->begin(), bins_vec[zoomlevel]->end());
}
